In [1]:
# load required libraries
options(stringsAsFactors = F)
options (repr.plot.width = 14, repr.plot.height = 6)
suppressPackageStartupMessages({
library(Seurat)
library(harmony)
library(ggplot2)
library(dplyr)
library(Matrix)
library(Hmisc)
library(ggsci)
library(viridis)
library(RColorBrewer)
library(ggrepel)
library(cowplot)
})
set.seed(123)
In [2]:
# load samples
sample_ctrl_220424 <- readRDS("../../1_CAD_Preprocessing/CTRL220424/rds/DCM1_clean_clustered.rds")
sample_ctrl_220424 <- subset(sample_ctrl_220424, idents = 2, invert = T)
sample_ctrl_220424$source <- "DCM1"
sample_ctrl_220424

sample_ctrl_220519 <- readRDS("../../1_CAD_Preprocessing/CTRL220519/rds/DCM2_clean_clustered.rds")
sample_ctrl_220519$source <- "DCM2"
sample_ctrl_220519

sample_ctrl_220615 <- readRDS("../../1_CAD_Preprocessing/CTRL220615/rds/DCM3_clean_clustered.rds")
sample_ctrl_220615$source <- "DCM3"
sample_ctrl_220615
An object of class Seurat 
33538 features across 13422 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 3 dimensional reductions calculated: pca, tsne, umap
An object of class Seurat 
33538 features across 9030 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 3 dimensional reductions calculated: pca, tsne, umap
An object of class Seurat 
33538 features across 8825 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 3 dimensional reductions calculated: pca, tsne, umap
In [3]:
# merge sample and normalize
scList <- list(sample_ctrl_220424, sample_ctrl_220519, sample_ctrl_220615)
sample <- merge(scList[[1]], scList[2:3])
sample <- subset(sample, nFeature_RNA >= 500 & nFeature_RNA <= 5000 & percent.mt <= 10)
sample <- NormalizeData(sample, normalization.method = "LogNormalize", scale.factor = 10000)

# find variable Genes and scale data
sample <- FindVariableFeatures(sample, selection.method = "vst")
sample <- ScaleData(sample)
sample
Warning message in CheckDuplicateCellNames(object.list = objects):
“Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.”
Centering and scaling data matrix

An object of class Seurat 
33538 features across 29660 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
In [4]:
# run pca and harmony
sample <- RunPCA(sample, verbose = FALSE)
sample <- RunHarmony(sample, group.by.vars = "source", verbose = FALSE)
DimPlot(sample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(sample, ndims = 30)
Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
In [5]:
# dimension reduction and clustering
pca_dims <- 1:30
sample <- RunTSNE(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- RunUMAP(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindNeighbors(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindClusters(sample, resolution = 0.5, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
Warning message:
“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
No description has been provided for this image
No description has been provided for this image
In [6]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_DCM.pdf", width = 12, height = 10)
No description has been provided for this image
In [7]:
# add resolution
sample <- FindClusters(sample, resolution = 1, verbose = FALSE)
DimPlot(sample, label = TRUE, reduction = "umap", group.by = "ident")
DimPlot(sample, label = TRUE, reduction = "tsne", group.by = "ident")
No description has been provided for this image
No description has been provided for this image
In [6]:
# plot marker genes list
markers <- unique(c("PECAM1","VWF","CDH5","FLT1","FLI1","LMO2","CLEC14A","FABP4","IFI27","EGFL7", # EC
"CCL14","ACKR1","NRP2","NR2F2", # VEC
"HEY1","GJA5","VEGFC","SOX17", # AEC
"ABCC9", "GUCY1A2", "RGS5", "EGFLAM", # Pericyte
"CCL21", # LEC
"CLDN1", # Mesothelial
"ACTA2","MYH11","MYL9","TAGLN","TPM2","RGS5","PPP1R14A", # SMC
"ACTA2","MYH11","LUM","DCN","PPP1R14A", # MyoFB
"LUM","DCN","FBLN1","APOD","CFD","SFRP2", # FB
"PLP1","NRXN1","S100B","MPZ","GPM6B", # OLG
"CD3D","CD3E","IL7R","CXCR4","CCL5","CD52", # T
"KLRD1","NKG7","CCL5", # NK
"CD68","AIF1","MPEG1","IL1B","CXCL8","CCL3","CXCR4", # Macrophage
"CD79A","MZB1","JCHAIN","IGKC","IGHG1","IGHG2","IGHA1", # B
"TPASB1","TPSB2","CPA3","AREG","HPGD5")) # Mast
DotPlot(sample, features=rev(markers), cols=c("grey90","red3"), group.by="seurat_clusters") + theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave("figure/dotplot_CAD_markers_DCM.pdf", width = 16, height = 10)
Warning message in FetchData.Seurat(object = object, vars = features, cells = cells):
“The following requested variables were not found: HPGD5, TPASB1”
No description has been provided for this image
In [7]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_res1.pdf", width = 15, height = 10)
No description has been provided for this image
In [8]:
# explore marker genes
FeaturePlot(sample, label = T, features=c("LAMA2"), cols=c("grey90","red3"), reduction="umap", pt.size = 0.1)
No description has been provided for this image
In [9]:
# save the sample
saveRDS(sample, file = "rds/sample_DCM.rds")
In [10]:
# remove low quality clusters
subsample <- subset(sample, idents = c(3, 16, 10, 14, 15, 22, 23, 24, 25), invert = T)
subsample <- NormalizeData(subsample, normalization.method = "LogNormalize", scale.factor = 10000)
dim(GetAssayData(subsample, slot = "counts"))

# find variable genes and scale data by number of UMIs and mitochondrial gene percentage
subsample <- FindVariableFeatures(subsample, selection.method = "vst")
length(subsample@assays$RNA@var.features)
subsample <- ScaleData(subsample)

# run pca and harmony
subsample <- RunPCA(subsample, verbose = FALSE)
subsample <- RunHarmony(subsample, group.by.vars = "source", verbose = FALSE)
DimPlot(subsample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(subsample, ndims = 30)
  1. 33538
  2. 23608
2000
Centering and scaling data matrix

Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
In [11]:
# dimension reduction and clustering
pca_dims <- 1:30
subsample <- RunTSNE(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- RunUMAP(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- FindNeighbors(subsample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
subsample <- FindClusters(subsample, resolution = 0.5, verbose = FALSE)
DimPlot(subsample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(subsample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
No description has been provided for this image
No description has been provided for this image
In [12]:
# quality control by cluster
VlnPlot(subsample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_subsample_by_cluster_DCM.pdf", width = 10, height = 10)
No description has been provided for this image
In [13]:
# add resolution
subsample <- FindClusters(subsample, resolution = 0.8, verbose = FALSE)
DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
No description has been provided for this image
No description has been provided for this image
In [14]:
# plot marker genes list
markers <- unique(c("PECAM1","VWF","CDH5","FLT1","FLI1","LMO2","CLEC14A","FABP4","IFI27","EGFL7", # EC
"CCL14","ACKR1","NRP2","NR2F2", # VEC
"HEY1","GJA5","VEGFC","SOX17", # AEC
"ABCC9", "GUCY1A2", "RGS5", "EGFLAM", # Pericyte
"CCL21", # LEC
"CLDN1", # Mesothelial
"ACTA2","MYH11","MYL9","TAGLN","TPM2","RGS5","PPP1R14A", # SMC
"ACTA2","MYH11","LUM","DCN","PPP1R14A", # MyoFB
"LUM","DCN","FBLN1","APOD","CFD","SFRP2", # FB
"PLP1","NRXN1","S100B","MPZ","GPM6B", # OLG
"CD3D","CD3E","IL7R","CXCR4","CCL5","CD52", # T
"KLRD1","NKG7","CCL5", # NK
"CD68","AIF1","MPEG1","IL1B","CXCL8","CCL3","CXCR4", # Macrophage
"CD79A","MZB1","JCHAIN","IGKC","IGHG1","IGHG2","IGHA1", # B
"TPASB1","TPSB2","CPA3","AREG","HPGD5")) # Mast
DotPlot(subsample, features=rev(markers), cols=c("grey90","red3"), group.by="seurat_clusters") + theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave("figure/dotplot_CAD_markers_subsample_DCM.pdf", width = 16, height = 10)
Warning message in FetchData.Seurat(object = object, vars = features, cells = cells):
“The following requested variables were not found: HPGD5, TPASB1”
No description has been provided for this image
In [15]:
# plot tsne and umap
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
plot_grid(a, b, ncol = 2)
No description has been provided for this image
In [16]:
# explore marker genes
gene <- "LUM"
a <- FeaturePlot(subsample, label = T, features=gene, cols=c("grey90","red3"), reduction="tsne", pt.size = 0.1)
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "ident")
plot_grid(a, b, ncol = 2)

c <- FeaturePlot(subsample, label = T, features=gene, cols=c("grey90","red3"), reduction="umap", pt.size = 0.1)
d <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "ident")
plot_grid(c, d, ncol = 2)
No description has been provided for this image
No description has been provided for this image
In [17]:
# save the subsample
saveRDS(subsample, file = "rds/subsample_DCM.rds")
In [18]:
# celltype assignment
subsample <- subset(subsample, idents = 18, invert = T) # remove cluster 18
cluster = c(0:17)
celltype = c("FB","EC","EC","T","SMC","SMC","PC","Mac","FB","LEC","PC",
            "EC","EC","EC","MC","GC","GC","T")
subsample$celltype = plyr::mapvalues(x=Idents(subsample), from=cluster, to=celltype)
a <- DimPlot(subsample, label = TRUE, reduction = "umap", group.by = "celltype")
b <- DimPlot(subsample, label = TRUE, reduction = "tsne", group.by = "celltype")
plot_grid(a, b, ncol = 2)
No description has been provided for this image
In [19]:
# save the annotated subsample
saveRDS(subsample, file = "rds/subsample_DCM_annotated_level1.rds")
In [20]:
# calculate markers
markers_cluster <- FindAllMarkers(subsample, only.pos = T, verbose = F)
write.csv(markers_cluster, "meta/markers_DCM_by_cluster.csv")
In [21]:
# calculate celltype markers
subsample$celltype <- factor(subsample$celltype, levels = c("VEC","CEC","AEC","LEC","MC","FB","SMC","PC1","PC2","Mac","T","NKT","GC1","GC2"))
Idents(subsample) <- subsample$celltype
markers_celltype <- FindAllMarkers(subsample, only.pos = T, verbose = F)
write.csv(markers_celltype, "meta/markers_DCM_by_celltype.csv")
In [22]:
# list the session info
sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/zju/ty/miniconda/envs/singlecell/lib/libopenblasp-r0.3.3.so;  LAPACK version 3.8.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=zh_CN.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=zh_CN.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=zh_CN.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C       

time zone: Asia/Shanghai
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] cowplot_1.1.1      ggrepel_0.9.4      RColorBrewer_1.1-3 viridis_0.6.4     
 [5] viridisLite_0.4.2  ggsci_3.0.0        Hmisc_5.1-1        Matrix_1.6-1.1    
 [9] dplyr_1.1.4        ggplot2_3.5.1      harmony_0.1.1      Rcpp_1.0.11       
[13] SeuratObject_4.1.4 Seurat_4.4.0      

loaded via a namespace (and not attached):
  [1] rstudioapi_0.17.1      jsonlite_1.8.7         magrittr_2.0.3        
  [4] spatstat.utils_3.1-1   rmarkdown_2.25         farver_2.1.1          
  [7] ragg_1.2.5             vctrs_0.6.4            ROCR_1.0-11           
 [10] spatstat.explore_3.2-3 base64enc_0.1-3        htmltools_0.5.6.1     
 [13] Formula_1.2-5          sctransform_0.4.0      parallelly_1.36.0     
 [16] KernSmooth_2.23-22     htmlwidgets_1.6.2      ica_1.0-3             
 [19] plyr_1.8.9             plotly_4.10.2          zoo_1.8-12            
 [22] uuid_1.1-1             igraph_1.5.1           mime_0.12             
 [25] lifecycle_1.0.3        pkgconfig_2.0.3        R6_2.5.1              
 [28] fastmap_1.1.1          fitdistrplus_1.1-11    future_1.33.0         
 [31] shiny_1.7.5            digest_0.6.33          colorspace_2.1-0      
 [34] patchwork_1.3.0.9000   tensor_1.5             irlba_2.3.5.1         
 [37] textshaping_0.4.0      labeling_0.4.3         progressr_0.14.0      
 [40] fansi_1.0.5            spatstat.sparse_3.0-2  httr_1.4.7            
 [43] polyclip_1.10-6        abind_1.4-5            compiler_4.3.1        
 [46] withr_2.5.1            htmlTable_2.4.1        backports_1.4.1       
 [49] MASS_7.3-60            tools_4.3.1            foreign_0.8-85        
 [52] lmtest_0.9-40          httpuv_1.6.11          future.apply_1.11.0   
 [55] nnet_7.3-19            goftest_1.2-3          glue_1.6.2            
 [58] nlme_3.1-163           promises_1.2.1         grid_4.3.1            
 [61] pbdZMQ_0.3-10          checkmate_2.2.0        Rtsne_0.16            
 [64] cluster_2.1.4          reshape2_1.4.4         generics_0.1.3        
 [67] gtable_0.3.4           spatstat.data_3.0-1    tidyr_1.3.1           
 [70] data.table_1.14.8      sp_2.1-0               utf8_1.2.3            
 [73] spatstat.geom_3.2-5    RcppAnnoy_0.0.21       RANN_2.6.1            
 [76] pillar_1.9.0           stringr_1.5.0          IRdisplay_1.1         
 [79] later_1.3.1            splines_4.3.1          lattice_0.21-9        
 [82] survival_3.5-7         deldir_1.0-9           tidyselect_1.2.0      
 [85] miniUI_0.1.1.1         pbapply_1.7-2          knitr_1.44            
 [88] gridExtra_2.3          scattermore_1.2        xfun_0.40             
 [91] matrixStats_1.0.0      stringi_1.7.12         lazyeval_0.2.2        
 [94] evaluate_0.22          codetools_0.2-19       tibble_3.2.1          
 [97] cli_3.6.3              uwot_0.1.16            IRkernel_1.3.2        
[100] rpart_4.1.21           systemfonts_1.1.0      xtable_1.8-4          
[103] reticulate_1.34.0      repr_1.1.6             munsell_0.5.0         
[106] globals_0.16.2         spatstat.random_3.1-6  png_0.1-8             
[109] parallel_4.3.1         ellipsis_0.3.2         listenv_0.9.0         
[112] scales_1.3.0           ggridges_0.5.4         leiden_0.4.3          
[115] purrr_1.0.2            crayon_1.5.2           rlang_1.1.4           
In [ ]: